Real-life examples¶

To better appreciate the performance of the model, it is also interesting to test it on real-life recordings (for which we don't have groundtruth for either the impulsive or the stationary sources).

In [38]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Audio
In [39]:
# HPSS

import torch
from rendering.is3.model_wrapper import ModelWrapper
from rendering.is3.baselines import wavelet_script
from rendering.is3.baselines import hpss
from rendering.is3.dataloader_numpy import ImpulsiveStationarySeparation

sr = 44100


hpss_module = hpss.HarmonicPercussiveDecomposition(
    nfft=2048,
    window_size=2048,
    overlap=0.75,
    margin=1.
)

hpss_module_2 = hpss.HarmonicPercussiveDecomposition(
    nfft=2048,
    window_size=2048,
    overlap=0.75,
    margin=2.
)


wavelet_module = wavelet_script.WaveletBaseline(
    wavelet="db",
    level=13,
    sr=sr,
    ks=2.,
    ks_impulse=6.,
    kc=1.,
    kernel_size=1025,
)


model = ModelWrapper(
    conf_name="014",
    job_id=None,
)
_ = model.eval()
In [40]:
import librosa


def open_and_plot_audio(url):
  signal, _ = librosa.load(
      url,
      sr=sr,
      mono=True,)

  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), dpi=150)

  # Plot the audio signal
  ax1.plot(np.arange(len(signal)) / sr, signal)
  ax1.set_title('Audio Signal')
  ax1.set_xlabel('Time (s)')
  ax1.set_ylabel('Amplitude')

  # Calculate the spectrogram
  S = librosa.stft(signal, n_fft=2048, hop_length=512)
  S_dB = librosa.amplitude_to_db(np.abs(S), ref=np.max)

  # Plot the spectrogram
  img = librosa.display.specshow(
      S_dB,
      sr=sr,
      n_fft=2048,
      hop_length=512,
      x_axis='time',
      y_axis='log',
      ax=ax2)
  fig.colorbar(img, ax=ax2, format='%+2.0f dB')
  ax2.set_title('Spectrogram')

  plt.tight_layout()
  plt.show()

  return signal

Example 1.¶

In [41]:
complete_signal = open_and_plot_audio(
    "audios/street.wav")
No description has been provided for this image
In [42]:
# 5s excerpt of the complete signal
real_mix = complete_signal[8 * sr:13 * sr]

print("Real Mixture")
display(Audio(real_mix, rate=sr))
Real Mixture
Your browser does not support the audio element.
In [43]:
# HPSS margin=2.

y_p_2, y_h_2, _, _ = hpss_module_2.forward(real_mix)

print("HPSS/Impulses")
display(Audio(y_p_2, rate=sr))

print("HPSS/Stationary Background")
display(Audio(y_h_2, rate=sr))
HPSS/Impulses
Your browser does not support the audio element.
HPSS/Stationary Background
Your browser does not support the audio element.
In [44]:
# Wavelet filtering

wavelet_bkg, wavelet_impulse = wavelet_module.forward(real_mix)

print("Wavelet/Impulses")
display(Audio(wavelet_impulse, rate=sr))

print("Wavelet/Stationary Background")
display(Audio(wavelet_bkg, rate=sr))
Wavelet/Impulses
Your browser does not support the audio element.
Wavelet/Stationary Background
Your browser does not support the audio element.
In [45]:
# IS³

y_i, y_s = model.forward(torch.tensor(real_mix).reshape(1, -1))

print("IS3/Impulses")
display(Audio(y_i[0].detach().numpy(), rate=sr))

print("IS3/Stationary Background")
display(Audio(y_s[0].detach().numpy(), rate=sr))
IS3/Impulses
Your browser does not support the audio element.
IS3/Stationary Background
Your browser does not support the audio element.
In [46]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle('Comparison of Impulse Separation Methods (Real Recording)')

# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')

# Plot HPSS (margin=2) impulse
axs[1].plot(y_p_2)
axs[1].set_title('HPSS (margin=2) Impulse')
axs[1].set_ylabel('Amplitude')

# Plot Wavelet impulse
axs[2].plot(wavelet_impulse)
axs[2].set_title('Wavelet Impulse')
axs[2].set_ylabel('Amplitude')

# Plot IS3 impulse
axs[3].plot(y_i[0].detach().numpy())
axs[3].set_title('IS³ Impulse')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [47]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle(
    'Comparison of Stationary Background Separation Methods (Real Recording)')

# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')


# Plot HPSS (margin=2) background
axs[1].plot(y_h_2)
axs[1].set_title('HPSS (margin=2) Background')
axs[1].set_ylabel('Amplitude')

# Plot Wavelet background
axs[2].plot(wavelet_bkg)
axs[2].set_title('Wavelet Background')
axs[2].set_ylabel('Amplitude')

# Plot IS3 background
axs[3].plot(y_s[0].detach().numpy())
axs[3].set_title('IS³ Background')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')

plt.tight_layout()
plt.show()
No description has been provided for this image

Example 2.¶

In [48]:
complete_signal = open_and_plot_audio(
    "audios/cafe.wav")
No description has been provided for this image
In [49]:
real_mix = complete_signal[10 * sr:15 * sr]
display(Audio(real_mix, rate=sr))
Your browser does not support the audio element.
In [50]:
# HPSS margin=2.

y_p, y_h, _, _ = hpss_module.forward(real_mix)

print("HPSS/Impulses")
display(Audio(y_p, rate=sr))

print("HPSS/Stationary Background")
display(Audio(y_h, rate=sr))
HPSS/Impulses
Your browser does not support the audio element.
HPSS/Stationary Background
Your browser does not support the audio element.
In [51]:
# Wavelet filtering

wavelet_bkg, wavelet_impulse = wavelet_module.forward(real_mix)

print("Wavelet/Impulses")
display(Audio(wavelet_impulse, rate=sr))

print("Wavelet/Stationary Background")
display(Audio(wavelet_bkg, rate=sr))
Wavelet/Impulses
Your browser does not support the audio element.
Wavelet/Stationary Background
Your browser does not support the audio element.
In [52]:
# IS³

y_i, y_s = model.forward(torch.tensor(real_mix).reshape(1, -1))

print("IS3/Impulses")
display(Audio(y_i[0].detach().numpy(), rate=sr))

print("IS3/Stationary Background")
display(Audio(y_s[0].detach().numpy(), rate=sr))
IS3/Impulses
Your browser does not support the audio element.
IS3/Stationary Background
Your browser does not support the audio element.
In [53]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle('Comparison of Impulse Separation Methods (Real Recording)')

# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')

# Plot HPSS (margin=1) impulse
axs[1].plot(y_p)
axs[1].set_title('HPSS (margin=2) Impulse')
axs[1].set_ylabel('Amplitude')

# Plot Wavelet impulse
axs[2].plot(wavelet_impulse)
axs[2].set_title('Wavelet Impulse')
axs[2].set_ylabel('Amplitude')

# Plot IS3 impulse
axs[3].plot(y_i[0].detach().numpy())
axs[3].set_title('IS³ Impulse')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [54]:
fig, axs = plt.subplots(4, 1, figsize=(15, 9), sharex=True, sharey=True)
fig.suptitle(
    'Comparison of Stationary Background Separation Methods (Real Recording)')

# Input waveform
axs[0].plot(real_mix)
axs[0].set_title('Input Signal')
axs[0].set_ylabel('Amplitude')


# Plot HPSS (margin=1) background
axs[1].plot(y_h)
axs[1].set_title('HPSS (margin=2) Background')
axs[1].set_ylabel('Amplitude')

# Plot Wavelet background
axs[2].plot(wavelet_bkg)
axs[2].set_title('Wavelet Background')
axs[2].set_ylabel('Amplitude')

# Plot IS3 background
axs[3].plot(y_s[0].detach().numpy())
axs[3].set_title('IS³ Background')
axs[3].set_ylabel('Amplitude')
axs[3].set_xlabel('Sample')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]: